# -*- coding: utf-8 -*-
"""Lasso.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1dSw6I4ByFH_qedHQ0MWmROJhBk2RyHIp
"""

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# 1. Generate some synthetic data with irrelevant features
np.random.seed(42)
n_samples = 100
n_relevant_features = 3
n_irrelevant_features = 7 # These features will have no true effect on y
n_total_features = n_relevant_features + n_irrelevant_features

# Create features: some relevant, some irrelevant
X = np.random.rand(n_samples, n_total_features)

# True coefficients for relevant features
true_coefficients = np.array([2.5, -1.0, 3.0] + [0.0] * n_irrelevant_features)
y = X @ true_coefficients + np.random.randn(n_samples) * 0.5 # Add some noise

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale features (important for regularization methods)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 2. Apply Lasso Regression

# We'll try different alpha values (regularization strength)
# A larger alpha means stronger regularization and more coefficients going to zero.
alphas = [0.001, 0.01, 0.1, 1.0] # Notice smaller alphas usually for Lasso than Ridge

print("--- Lasso Regression ---")
for alpha in alphas:
    lasso_model = Lasso(alpha=alpha, max_iter=10000) # Increased max_iter for convergence
    lasso_model.fit(X_train_scaled, y_train)
    y_pred_lasso = lasso_model.predict(X_test_scaled)

    mse_lasso = mean_squared_error(y_test, y_pred_lasso)
    r2_lasso = r2_score(y_test, y_pred_lasso)

    print(f"\nAlpha: {alpha}")
    print(f"Lasso Coefficients (rounded to 3 decimal places): {np.round(lasso_model.coef_, 3)}")
    print(f"Lasso Intercept: {lasso_model.intercept_:.4f}")
    print(f"Lasso Mean Squared Error: {mse_lasso:.4f}")
    print(f"Lasso R-squared: {r2_lasso:.4f}")
    print(f"Number of non-zero coefficients: {np.sum(lasso_model.coef_ != 0)}")

# 3. Compare with standard Linear Regression (OLS)
print("\n--- Linear Regression (OLS) ---")
linear_model = LinearRegression()
linear_model.fit(X_train_scaled, y_train)
y_pred_linear = linear_model.predict(X_test_scaled)

mse_linear = mean_squared_error(y_test, y_pred_linear)
r2_linear = r2_score(y_test, y_pred_linear)

print(f"Linear Regression Coefficients (rounded to 3 decimal places): {np.round(linear_model.coef_, 3)}")
print(f"Linear Regression Intercept: {linear_model.intercept_:.4f}")
print(f"Linear Regression Mean Squared Error: {mse_linear:.4f}")
print(f"Linear Regression R-squared: {r2_linear:.4f}")

# 4. Compare with Ridge Regression (for comparison of coefficient behavior)
print("\n--- Ridge Regression (for comparison) ---")
# Using an alpha that might be reasonable for Ridge, often higher than Lasso
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_model.predict(X_test_scaled)

mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print(f"Ridge Coefficients (rounded to 3 decimal places): {np.round(ridge_model.coef_, 3)}")
print(f"Ridge Intercept: {ridge_model.intercept_:.4f}")
print(f"Ridge Mean Squared Error: {mse_ridge:.4f}")
print(f"Ridge R-squared: {r2_ridge:.4f}")
print(f"Number of non-zero coefficients: {np.sum(ridge_model.coef_ != 0)}")


# Optional: Visualize the coefficients as a function of alpha (regularization strength)
n_alphas = 200
alphas_plot = np.logspace(-4, 0, n_alphas) # Adjust range for Lasso, often smaller alphas
coefs = []

for a in alphas_plot:
    lasso = Lasso(alpha=a, fit_intercept=True, max_iter=10000)
    lasso.fit(X_train_scaled, y_train)
    coefs.append(lasso.coef_)

plt.figure(figsize=(10, 6))
ax = plt.gca()
ax.plot(alphas_plot, coefs)
ax.set_xscale("log")
ax.set_xlim(ax.get_xlim()[::-1])  # Reverse axis
plt.xlabel("alpha (regularization strength)")
plt.ylabel("Coefficients")
plt.title("Lasso Coefficients as a Function of Regularization")
plt.legend([f"Feature {i}" for i in range(n_total_features)])
plt.axis("tight")
plt.show()